1. 读取输入数据 | Load Input Data

# 设置工作目录 | Set working directory
# setwd("your_data_path")

# 读取三个主要数据文件 | Read three main data files
drug_ann <- read.csv("easy_input_drugAnn.csv", stringsAsFactors = FALSE)
gene_ann <- read.csv("easy_input_geneAnn.csv", stringsAsFactors = FALSE)
hypoxia_data <- read.csv("easy_input_hypoxia.csv", stringsAsFactors = FALSE)
practice_data <- read.csv("easy_input_practice.csv", stringsAsFactors = FALSE)

# 查看数据结构 | View data structure
cat("=== 药物注释数据 | Drug Annotation ===\n")
## === 药物注释数据 | Drug Annotation ===
head(drug_ann)
##     drug       therapy n_sensitive n_resistant   signaling
## 1 Drug 1 Immunotherapy           2           1 Signaling 1
## 2 Drug 2 Immunotherapy           2           1 Signaling 2
## 3 Drug 3 Immunotherapy           3           3 Signaling 3
## 4 Drug 4 Immunotherapy           4           2 Signaling 3
## 5 Drug 5 Immunotherapy          NA           3 Signaling 4
## 6 Drug 6  Chemotherapy           3           3 Signaling 5
cat("\n=== 基因注释数据 | Gene Annotation ===\n")
## 
## === 基因注释数据 | Gene Annotation ===
head(gene_ann)
##     gene   drug
## 1 Gene 1 Drug 1
## 2 Gene 2 Drug 2
## 3 Gene 2 Drug 3
## 4 Gene 3 Drug 3
## 5 Gene 4 Drug 4
## 6 Gene 5 Drug 4
cat("\n=== 低氧数据 | Hypoxia Data ===\n")
## 
## === 低氧数据 | Hypoxia Data ===
head(hypoxia_data)
##     cancer   gene direction       omics
## 1 Cancer10 Gene26      high    Mutation
## 2 Cancer 5 Gene23       low Methylation
## 3 Cancer 3 Gene25       low    Mutation
## 4 Cancer 3 Gene27       low    Mutation
## 5 Cancer 5 Gene21      high    Mutation
## 6 Cancer 4 Gene16       low     Protein
cat("\n=== 实践数据 | Practice Data ===\n")
## 
## === 实践数据 | Practice Data ===
head(practice_data)
##                 x
## 1 Gene21-Cancer 5
## 2 Gene13-Cancer 9
## 3 Gene 2-Cancer 9
## 4 Gene 9-Cancer 4
## 5 Gene16-Cancer 5
## 6 Gene 5-Cancer 9
# 数据清理:统一基因名称格式 | Data cleaning: unify gene name format
hypoxia_data$gene <- str_trim(hypoxia_data$gene)
hypoxia_data$cancer <- str_trim(hypoxia_data$cancer)
gene_ann$gene <- str_trim(gene_ann$gene)
drug_ann$drug <- str_trim(drug_ann$drug)

2. 左侧图:基因-癌症关联矩阵 | Left Panel: Gene-Cancer Association Matrix

# 准备左侧面板数据 | Prepare left panel data
# 获取所有唯一的基因 | Get unique genes sorted by frequency
left_panel_data <- hypoxia_data %>%
  group_by(gene) %>%
  summarise(n_assoc = n(), .groups = "drop") %>%
  arrange(desc(n_assoc))

# 获取排序后的基因列表(用于后续的一致性) | Get sorted gene list for consistency
gene_order <- left_panel_data$gene

cat("左侧面板基因顺序 | Gene order in left panel:\n")
## 左侧面板基因顺序 | Gene order in left panel:
print(gene_order)
##  [1] "Gene21" "Gene23" "Gene28" "Gene 6" "Gene16" "Gene 1" "Gene 7" "Gene 9"
##  [9] "Gene20" "Gene 2" "Gene 8" "Gene12" "Gene17" "Gene18" "Gene19" "Gene22"
## [17] "Gene24" "Gene25" "Gene26" "Gene 3" "Gene 4" "Gene10" "Gene13" "Gene14"
## [25] "Gene27" "Gene29" "Gene 5" "Gene15" "Gene30" "Gene11"
# 转换omics类型为形状值 | Convert omics types to shape values
omics_shape_map <- c(
  "mRNA" = 15,          # 方形 | square
  "Protein" = 16,       # 圆形 | circle
  "Methylation" = 17,   # 三角形 | triangle
  "Mutation" = 18,      # 菱形 | diamond
  "CNV" = 8             # 星形 | asterisk
)

# 准备绘图数据 | Prepare plotting data
plot_data_left <- hypoxia_data %>%
  mutate(
    shape = omics_shape_map[omics],
    color = ifelse(direction == "high", "red", "blue"),
    # 确保基因因子顺序 | Ensure gene factor order
    gene = factor(gene, levels = gene_order)
  ) %>%
  arrange(gene)

# 获取所有唯一的癌症类型 | Get unique cancer types
cancer_order <- sort(unique(hypoxia_data$cancer))

plot_data_left <- plot_data_left %>%
  mutate(
    cancer = factor(cancer, levels = cancer_order)
  )

# 绘制左侧面板 | Plot left panel
p_left <- ggplot(plot_data_left, aes(x = cancer, y = gene)) +
  geom_point(aes(shape = omics, color = direction), 
             size = 4, alpha = 0.9, stroke = 1.2) +
  scale_shape_manual(
    name = "omics",
    values = omics_shape_map,
    breaks = names(omics_shape_map)
  ) +
  scale_color_manual(
    name = "direction",
    values = c("high" = "#E41A1C", "low" = "#377EB8"),
    breaks = c("high", "low")
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
    axis.text.y = element_text(size = 8),
    axis.title = element_blank(),
    panel.grid.major = element_line(color = "gray95", linewidth = 0.2),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9),
    plot.title = element_text(hjust = 0, size = 12, face = "bold", margin = margin(b = 10))
  ) +
  labs(title = "A. Gene-Cancer Association Matrix")

print(p_left)

3. 中间图:基因-药物连接线(保持左侧基因顺序) | Middle Panel: Gene-Drug Connection Lines (Consistent Gene Order)

# 准备中间面板数据:基因-药物关联 | Prepare middle panel data: gene-drug associations
middle_data <- gene_ann %>%
  left_join(
    hypoxia_data %>% select(gene) %>% distinct(),
    by = "gene"
  ) %>%
  filter(!is.na(gene))  # 只保留在hypoxia数据中有的基因 | Keep only genes in hypoxia data

# 确保基因顺序与左侧一致 | Ensure gene order matches left panel
middle_data <- middle_data %>%
  mutate(
    gene = factor(gene, levels = gene_order),
    # 根据因子级别分配y坐标 | Assign y-coordinate based on factor level
    gene_y = as.numeric(gene)
  ) %>%
  arrange(gene_y) %>%
  select(-gene_y)

# 获取药物y坐标 | Get drug y-coordinates
drug_list <- unique(middle_data$drug)
drug_pos <- data.frame(
  drug = drug_list,
  drug_y = seq_along(drug_list)
)

# 添加药物信息和y坐标 | Add drug info and y-coordinates
middle_plot_data <- middle_data %>%
  left_join(drug_pos, by = "drug") %>%
  left_join(
    drug_ann %>% select(drug, therapy, signaling),
    by = "drug"
  ) %>%
  mutate(
    # 根据signaling分配连线颜色 | Assign line colors by signaling
    line_color = case_when(
      signaling %in% c("Signaling 1", "Signaling 2") ~ "#9370DB",  # 紫色 | Purple
      signaling %in% c("Signaling 3", "Signaling 4", "Signaling 5", "Signaling 6") ~ "#FF4444",  # 红色 | Red
      signaling == "Signaling 7" ~ "#4169E1",  # 蓝色 | Blue
      signaling %in% c("Signaling 8", "Signaling 9", "Signaling10") ~ "#FFA500",  # 橙色 | Orange
      TRUE ~ "#CCCCCC"  # 灰色 | Gray
    ),
    gene_numeric = as.numeric(as.factor(gene))
  )

# 为了保持顺序,重新编号基因的y坐标 | Renumber gene y-coordinates to maintain order
gene_y_map <- data.frame(
  gene = gene_order,
  gene_y = seq_along(gene_order)
)

middle_plot_data <- middle_plot_data %>%
  left_join(gene_y_map, by = "gene")

cat("中间面板数据摘要 | Middle panel data summary:\n")
## 中间面板数据摘要 | Middle panel data summary:
cat("连接数量 | Number of connections:", nrow(middle_plot_data), "\n")
## 连接数量 | Number of connections: 36
cat("基因数量 | Number of genes:", n_distinct(middle_plot_data$gene), "\n")
## 基因数量 | Number of genes: 30
cat("药物数量 | Number of drugs:", n_distinct(middle_plot_data$drug), "\n\n")
## 药物数量 | Number of drugs: 20
# 绘制中间面板 | Plot middle panel
p_middle <- ggplot(middle_plot_data) +
  # 绘制连接线 | Draw connection lines
  geom_segment(
    aes(x = 0, y = gene_y, xend = 1, yend = drug_y, color = line_color),
    alpha = 0.35,
    linewidth = 0.5
  ) +
  # 添加基因点 | Add gene points
  geom_point(
    data = gene_y_map, 
    aes(x = 0, y = gene_y), 
    size = 2.5, 
    color = "steelblue",
    alpha = 0.8
  ) +
  # 添加药物点 | Add drug points
  geom_point(
    data = drug_pos, 
    aes(x = 1, y = drug_y), 
    size = 2.5, 
    color = "darkred",
    alpha = 0.8
  ) +
  # 添加基因标签 | Add gene labels
  geom_text(
    data = gene_y_map, 
    aes(x = -0.08, y = gene_y, label = gene), 
    hjust = 1, 
    size = 2.5, 
    color = "steelblue",
    fontface = "bold"
  ) +
  # 添加药物标签 | Add drug labels
  geom_text(
    data = drug_pos, 
    aes(x = 1.08, y = drug_y, label = drug), 
    hjust = 0, 
    size = 2.5, 
    color = "darkred",
    fontface = "bold"
  ) +
  scale_color_identity() +
  xlim(-0.25, 1.25) +
  ylim(0.5, max(drug_pos$drug_y) + 0.5) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 12, face = "bold", margin = margin(b = 10))
  ) +
  labs(title = "B. Gene-Drug-Pathway Links")

print(p_middle)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).

4. 右侧图:药物响应条形图 | Right Panel: Drug Response Bar Chart

# 准备右侧面板数据 | Prepare right panel data
right_panel_data <- drug_ann %>%
  filter(drug %in% drug_list) %>%  # 只包含中间面板涉及的药物 | Include only drugs from middle panel
  select(drug, therapy, signaling, n_sensitive, n_resistant) %>%
  mutate(
    # 处理NA值 | Handle NA values
    n_sensitive = ifelse(is.na(n_sensitive), 0, n_sensitive),
    n_resistant = ifelse(is.na(n_resistant), 0, n_resistant),
    # 分配治疗方式颜色 | Assign therapy colors
    therapy_color = case_when(
      therapy == "Chemotherapy" ~ "#E41A1C",     # 红色 | Red
      therapy == "Hormone therapy" ~ "#377EB8",   # 蓝色 | Blue
      therapy == "Immunotherapy" ~ "#4DAF4A",     # 绿色 | Green
      therapy == "Targeted therapy" ~ "#FF7F00",  # 橙色 | Orange
      TRUE ~ "#999999"  # 灰色 | Gray
    ),
    # 分配signaling背景颜色 | Assign signaling background
    signaling_bg = case_when(
      signaling %in% c("Signaling 1", "Signaling 2") ~ "#E6D7F0",  # 浅紫 | Light purple
      signaling %in% c("Signaling 3", "Signaling 4", "Signaling 5", "Signaling 6") ~ "#FFE6E6",  # 浅红 | Light red
      signaling == "Signaling 7" ~ "#E6F0FF",  # 浅蓝 | Light blue
      signaling %in% c("Signaling 8", "Signaling 9", "Signaling10") ~ "#FFF0E6",  # 浅橙 | Light orange
      TRUE ~ "#F5F5F5"  # 浅灰 | Light gray
    )
  )

# 按治疗方式和signaling分组排序 | Sort by therapy and signaling
right_panel_data <- right_panel_data %>%
  arrange(factor(therapy, levels = c("Chemotherapy", "Hormone therapy", 
                                     "Immunotherapy", "Targeted therapy")),
          signaling) %>%
  mutate(
    # 为了在facet中保持顺序,创建drug factor | Create drug factor to maintain order in facet
    drug = factor(drug, levels = drug),
    signaling_order = as.numeric(factor(signaling, levels = unique(signaling)))
  )

# 数据透视:将敏感和耐药分开 | Pivot data: separate sensitive and resistant
right_plot_data <- right_panel_data %>%
  pivot_longer(
    cols = c(n_sensitive, n_resistant),
    names_to = "response_type",
    values_to = "count"
  ) %>%
  mutate(
    response_type = factor(
      response_type,
      levels = c("n_sensitive", "n_resistant"),
      labels = c("n_sensitive", "n_resistant")
    ),
    response_color = ifelse(response_type == "n_sensitive", "#2ECC71", "#E91E63")  # 绿色/品红 | Green/Magenta
  )

# 绘制右侧面板 | Plot right panel
p_right <- ggplot(right_plot_data, aes(x = count, y = drug, fill = response_color)) +
  geom_bar(stat = "identity", position = "identity", alpha = 0.85, width = 0.7) +
  # 添加facet背景 | Add facet background with signaling colors
  facet_wrap(
    ~factor(therapy, levels = c("Chemotherapy", "Hormone therapy", 
                               "Immunotherapy", "Targeted therapy")),
    scales = "free_y",
    ncol = 1
  ) +
  scale_fill_identity(
    name = "cat",
    breaks = c("#2ECC71", "#E91E63"),
    labels = c("#2ECC71" = "n_sensitive", "#E91E63" = "n_resistant"),
    guide = guide_legend(override.aes = list(alpha = 0.85))
  ) +
  scale_x_continuous(
    limits = c(0, 12),
    breaks = seq(0, 12, 4),
    labels = c("0", "4", "8", "12")
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8, color = right_plot_data$therapy_color),
    axis.text.x = element_text(size = 9),
    axis.title = element_blank(),
    strip.text = element_text(size = 10, face = "bold", margin = margin(b = 5)),
    strip.background = element_rect(fill = "gray95", color = "gray80"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9),
    plot.title = element_text(hjust = 0, size = 12, face = "bold", margin = margin(b = 10))
  ) +
  labs(title = "C. Drug Response Profile", x = "# Cancers")

print(p_right)

5. 组合完整图形 | Combine All Panels

# 使用patchwork组合三个图 | Combine three plots using patchwork
combined_plot <- (p_left | p_middle | p_right) +
  plot_layout(
    widths = c(2.5, 1.5, 3),
    guides = "collect"
  ) +
  plot_annotation(
    title = "多层次基因-药物-通路关联分析 | Multi-level Gene-Drug-Pathway Association Analysis",
    theme = theme(
      plot.title = element_text(hjust = 0.5, size = 14, face = "bold", margin = margin(b = 15))
    )
  )

print(combined_plot)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).

# 保存高质量PDF | Save high-quality PDF
ggsave(
  filename = "gene_drug_pathway_analysis.pdf",
  plot = combined_plot,
  width = 20,
  height = 14,
  dpi = 300,
  units = "in"
)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 多 (U+591A)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 层 (U+5C42)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 次 (U+6B21)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 基 (U+57FA)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 因 (U+56E0)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 药 (U+836F)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 物 (U+7269)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 通 (U+901A)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 路 (U+8DEF)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 关 (U+5173)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 联 (U+8054)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 分 (U+5206)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 析 (U+6790)
cat("\n✓ 图形已保存为 gene_drug_pathway_analysis.pdf\n")
## 
## ✓ 图形已保存为 gene_drug_pathway_analysis.pdf
# 保存为PNG | Save as PNG
ggsave(
  filename = "gene_drug_pathway_analysis.png",
  plot = combined_plot,
  width = 20,
  height = 14,
  dpi = 300,
  units = "in"
)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).
cat("✓ 图形已保存为 gene_drug_pathway_analysis.png\n")
## ✓ 图形已保存为 gene_drug_pathway_analysis.png

6. 数据统计汇总 | Data Summary Statistics

cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("基因-癌症关联统计 | Gene-Cancer Association Statistics\n")
## 基因-癌症关联统计 | Gene-Cancer Association Statistics
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("总关联数 | Total associations:", nrow(hypoxia_data), "\n")
## 总关联数 | Total associations: 151
cat("独特基因数 | Number of unique genes:", n_distinct(hypoxia_data$gene), "\n")
## 独特基因数 | Number of unique genes: 30
cat("独特癌症类型数 | Number of unique cancers:", n_distinct(hypoxia_data$cancer), "\n")
## 独特癌症类型数 | Number of unique cancers: 10
cat("组学数据类型分布 | Omics type distribution:\n")
## 组学数据类型分布 | Omics type distribution:
print(table(hypoxia_data$omics))
## 
##         CNV Methylation        mRNA    Mutation     Protein 
##          27          23          37          35          29
cat("\n表达方向分布 | Expression direction distribution:\n")
## 
## 表达方向分布 | Expression direction distribution:
print(table(hypoxia_data$direction))
## 
## high  low 
##   71   80
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("基因-药物-通路关联统计 | Gene-Drug-Pathway Association Statistics\n")
## 基因-药物-通路关联统计 | Gene-Drug-Pathway Association Statistics
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("总关联数 | Total associations:", nrow(middle_plot_data), "\n")
## 总关联数 | Total associations: 36
cat("涉及基因数 | Number of genes:", n_distinct(middle_plot_data$gene), "\n")
## 涉及基因数 | Number of genes: 30
cat("涉及药物数 | Number of drugs:", n_distinct(middle_plot_data$drug), "\n")
## 涉及药物数 | Number of drugs: 20
cat("涉及通路数 | Number of pathways:", n_distinct(middle_plot_data$signaling), "\n")
## 涉及通路数 | Number of pathways: 10
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("治疗方式分布 | Therapy Type Distribution\n")
## 治疗方式分布 | Therapy Type Distribution
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
print(table(drug_ann$therapy))
## 
##     Chemotherapy  Hormone therapy    Immunotherapy Targeted therapy 
##                5                5                5                5
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("通路分布 | Signaling Pathway Distribution\n")
## 通路分布 | Signaling Pathway Distribution
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
print(table(drug_ann$signaling))
## 
## Signaling 1 Signaling 2 Signaling 3 Signaling 4 Signaling 5 Signaling 6 
##           1           1           2           1           2           3 
## Signaling 7 Signaling 8 Signaling 9 Signaling10 
##           5           1           2           2